# ==============================================================================
# DESCRIPTION ----
# ==============================================================================

# Conducts differences-in-differences analysis of effect of former 
# bureaucrats on NPO's negotiated contract values using monthly aggregated data. 

# ==============================================================================
# PREAMBLE, LIBRARIES, AND IMPORT ----
# ==============================================================================
# Preamble, packages -----------------------------------------------------------
library(tidyverse)
library(DIDmultiplegt)
library(plm)
library(panelView)
library(benford.analysis)
library(gridExtra)
library(tsibble)

# Functions
start_time <- Sys.time()
source("code/0.2_functions.R")

# Import subsidy data
load("data/jnpo.Rdata")

# ==============================================================================
# TRANSFORM SUBSIDY DATA TO TIME SERIES ----
# ==============================================================================

# Transform into monthly time series dataset -----------------------------------
# Collapse data to firm year-month level: without ministry information
npo_amakudari <- jnpo %>%
  mutate(grantee_clean = str_replace(grantee_clean, "ー", "-")) %>%
  filter(!is.na(govt_reemployees), amount > 0, grant_year > 2007) %>%
  mutate(amakudari = as.numeric(as.character(govt_reemployees))) %>%
  group_by(grantee_clean, grant_month) %>%
  summarize(amount = sum(amount), amakudari_n = max(amakudari), .groups = "drop") %>%
  mutate(
    amakudari = ifelse(amakudari_n > 0, 1, 0),
    amount_log = log(amount),
    amount = amount / 1000000
  ) %>%
  ungroup()

# Collapse data to firm year-month level: with ministry information
npo_amakudari_min <- jnpo %>%
  filter(!is.na(govt_reemployees), amount > 0) %>%
  mutate(amakudari = as.numeric(as.character(govt_reemployees))) %>%
  group_by(grantee_clean, grant_month, granter_ministry) %>%
  summarize(amount = sum(amount), amakudari_n = max(govt_reemployees)) %>%
  mutate(
    amakudari = ifelse(amakudari_n > 0, 1, 0),
    amount_log = log(amount),
    amount = amount / 1000000
    )

# ==============================================================================
# ANALYSIS USING DiDM ESTIMATOR ----
# ==============================================================================
# Define parameters for log estimation
Y = "amount_log"
G = "grantee_clean"
T = "grant_month"
D_binary = "amakudari"
D_cont = "amakudari_n"

# Run models for log estimation
set.seed(123)
par(mar=c(4,4,4,4), mfrow=c(2,1), cex.axis=0.5, cex.main=0.75)
neg_binary <- did_multiplegt(npo_amakudari, Y, G, T, D_binary, placebo = 3, brep = 50)
abline(h=0, col="black", lty = 2)
title(main = "Bureaucratic hires (binary)")
neg_cont <- did_multiplegt(npo_amakudari, Y, G, T, D_cont, placebo = 3, brep = 50)
abline(h=0, col="black", lty = 2)
title(main = "Bureaucratic hires (continuous)")

# Define parameters for level estimation
Y = "amount"

# Run models for level estimation
set.seed(123)
par(mar=c(4,4,4,4), mfrow=c(2,1), cex.axis=0.5, cex.main=0.75)
neg_binary_level <- did_multiplegt(npo_amakudari, Y, G, T, D_binary, placebo = 3, brep = 50)
abline(h=0, col="black", lty = 2)
title(main = "Bureaucratic hires (binary)")
neg_cont_level <- did_multiplegt(npo_amakudari, Y, G, T, D_cont, placebo = 3, brep = 50)
abline(h=0, col="black", lty = 2)
title(main = "Bureaucratic hires (continuous)")

# Plot models ------------------------------------------------------------------
# Plot models: binary
binary_plot <- as_tibble(neg_binary) %>%
  select(-contains("N"),
         effect_1 = placebo_1, effect_2 = placebo_2, effect_3 = placebo_3,
         se_effect_1 = se_placebo_1, se_effect_2 = se_placebo_2,
         se_effect_3 = se_placebo_3) %>%
  pivot_longer(cols = c(starts_with("effect"), starts_with("se"))) %>%
  mutate(
    effect = ifelse(str_detect(name, "se"), "se", "effect"),
    time = c(-3, -2, -1, 0, -3, -2, -1, 0)) %>%
  select(-name) %>%
  pivot_wider(names_from = effect, values_from = value) %>%
  mutate(model = "Bureaucratic hires (binary)")

# Continuous plot
cont_plot <- as_tibble(neg_cont) %>%
  select(-contains("N"),
         effect_1 = placebo_1, effect_2 = placebo_2, effect_3 = placebo_3,
         se_effect_1 = se_placebo_1, se_effect_2 = se_placebo_2,
         se_effect_3 = se_placebo_3) %>%
  pivot_longer(cols = c(starts_with("effect"), starts_with("se"))) %>%
  mutate(
    effect = ifelse(str_detect(name, "se"), "se", "effect"),
    time = c(-3, -2, -1, 0, -3, -2, -1, 0)) %>%
  select(-name) %>%
  pivot_wider(names_from = effect, values_from = value) %>%
  mutate(model = "Bureaucratic hires (continuous)")

plot <- rbind(binary_plot, cont_plot)

# FIGURE 7: Create combined plot
ggplot(plot, aes(time, effect)) +
  geom_point(color = "steelblue2", size = 1.5) +
  geom_line(color = "grey80") +
  geom_errorbar(aes(x = time,
                     ymin = effect - 1.96*se,
                     ymax = effect + 1.96*se),
                 color="grey30", size=0.5, alpha = 0.5, width = 0.1) +
  geom_hline(yintercept = 0, linetype = "dashed") +
  ylab("Effect of appointment on log(contract value)") +
  xlab("Time (periods before hire)") +
  scale_y_continuous(limits = c(-1, 1), breaks=seq(-1, 1, 0.5)) +
  facet_wrap(~model) +
  theme_classic()

# FIGURE 7
ggsave("figures/7_npo_amakudari.pdf", height = 3.5, width = 7)

# Create tables ----------------------------------------------------------------
# Binary: logs
neg_binary_table <- as_tibble(neg_binary)
neg_binary_table <- bind_rows(
  neg_binary_table %>% transmute(time = 3, Effect = placebo_3, SE = se_placebo_3, N = N_placebo_3),
  neg_binary_table %>% transmute(time = 2, Effect = placebo_2, SE = se_placebo_2, N = N_placebo_2),
  neg_binary_table %>% transmute(time = 1, Effect = placebo_1, SE = se_placebo_1, N = N_placebo_1),
  neg_binary_table %>% transmute(time = 0, Effect = effect, SE = se_effect, N = N_effect)
  )
neg_binary_table <- neg_binary_table %>%
  mutate(across(Effect:SE, ~round(., 2)))

# TABLE A20
stargazer::stargazer(
  neg_binary_table, summary = FALSE, digits = 2, rownames = FALSE, font.size = "footnotesize",
  title = "Effect of amakudari appointments on NPO negotiated contract value (binary outcome)",
  label = "tab: npo_binary",
  out = "tables/a20_npo_binary.tex")

# Continuous: logs
neg_cont_table <- as_tibble(neg_cont)
neg_cont_table <- bind_rows(
  neg_cont_table %>% transmute(time = 3, Effect = placebo_3, SE = se_placebo_3, N = N_placebo_3),
  neg_cont_table %>% transmute(time = 2, Effect = placebo_2, SE = se_placebo_2, N = N_placebo_2),
  neg_cont_table %>% transmute(time = 1, Effect = placebo_1, SE = se_placebo_1, N = N_placebo_1),
  neg_cont_table %>% transmute(time = 0, Effect = effect, SE = se_effect, N = N_effect)
)
neg_cont_table <- neg_cont_table %>%
  mutate(across(Effect:SE, ~round(., 2)))

# TABLE A21
stargazer::stargazer(
  neg_cont_table, summary = FALSE, digits = 2, rownames = FALSE, font.size = "footnotesize",
  title = "Effect of amakudari appointments on NPO negotiated contract value (continuous outcome)",
  label = "tab: npo_cont",
  out = "tables/a21_npo_cont.tex")

end_time <- Sys.time()
time_taken <- end_time - start_time
print(time_taken)

# ==============================================================================
# ANALYSIS USING 2WFE ESTIMATORS ----
# ==============================================================================
# NOTE: There is an issue in the FECT package in which for some bootstrap samples
# all units might be treated (no control), or the control units might be present 
# only in non-pre-treatment periods, or only one control unit survives after 
# filtering. This causes res_boot[pre.pos, ] to become a vector, not a matrix,
# which triggers the following error:
# Error in res_boot[pre.pos, ] : incorrect number of dimensions

# As this creates difficulty providing reproducible code using the bootstrap, 
# uncertainty estimates using parametric variance estimators are used. 

# Bootstrapped standard errors can be calculated by setting vartype to 
# "bootstrap", but code may fail for some runs. 

# Package clashes with DIDmultiplegt: unload DIDm and load fect ----------------
detach("package:DIDmultiplegt", unload = TRUE)
library(fect)  # load cleanly

#### FEct, IFECT, and MC methods -----------------------------------------------
# FEct
set.seed(123)
fect <- fect(amount_log ~ amakudari, data = npo_amakudari, 
             index = c("grantee_clean", "grant_month"), 
             method = "fe", force = "two-way", na.rm = TRUE, se = TRUE,
             vartype = "parametric")

fect_plot <- plot(fect, main = "FEct", 
                  ylab = "Effect on log(contract value)",
                  cex.main = 0.8, cex.lab = 0.8, cex.axis = 0.8, proportion = 0.25)

set.seed(123)
fect_level <- fect(amount ~ amakudari, data = npo_amakudari, 
                   index = c("grantee_clean", "grant_month"), 
                   method = "fe", force = "two-way", na.rm = TRUE, se = TRUE,
                   vartype = "parametric")

fect_plot_level <- plot(fect_level, main = "FEct", 
                        ylab = "Effect on contract value",
                        cex.main = 0.8, cex.lab = 0.8, cex.axis = 0.8, proportion = 0.25)

# IFEct
set.seed(123)
ifect <- fect(amount_log ~ amakudari, data = npo_amakudari, 
              index = c("grantee_clean", "grant_month"), 
              method = "ife", force = "two-way", na.rm = TRUE, se = TRUE,
              vartype = "parametric")

ifect_plot <- plot(ifect, main = "IFEct", 
                   ylab = "Effect on log(contract value)", xlim = c(-2, 1),
                   cex.main = 0.8, cex.lab = 0.8, cex.axis = 0.8, proportion = 0.25)

set.seed(123)
ifect_level <- fect(amount ~ amakudari, data = npo_amakudari, 
                    index = c("grantee_clean", "grant_month"), 
                    method = "ife", force = "two-way", na.rm = TRUE, se = TRUE,
                    vartype = "parametric")

ifect_plot_level <- plot(ifect_level, main = "IFEct", 
                         ylab = "Effect on contract value", xlim = c(-2, 1),
                         cex.main = 0.8, cex.lab = 0.8, cex.axis = 0.8, proportion = 0.25)

# Matrix completion
set.seed(123)
mc <- fect(amount_log ~ amakudari, data = npo_amakudari,
           index = c("grantee_clean", "grant_month"), 
           method = "mc", force = "two-way", na.rm = TRUE, se = TRUE,
           vartype = "parametric")

mc_plot <- plot(mc, main = "Matrix Completion", 
                ylab = "Effect on log(contract value)", xlim = c(-2, 1),
                cex.main = 0.8, cex.lab = 0.8, cex.axis = 0.8, proportion = 0.25)

set.seed(123)
mc_level <- fect(amount ~ amakudari, data = npo_amakudari,
                 index = c("grantee_clean", "grant_month"), 
                 method = "mc", force = "two-way", na.rm = TRUE, se = TRUE,
                 vartype = "parametric")

mc_plot_level <- plot(mc_level, main = "Matrix Completion", 
                      ylab = "Effect on contract value", xlim = c(-2, 1),
                      cex.main = 0.8, cex.lab = 0.8, cex.axis = 0.8, proportion = 0.25)

plot_binary <- arrangeGrob(
  fect_plot, ifect_plot, mc_plot, fect_plot_level, ifect_plot_level, mc_plot_level,
  ncol = 3,
  top = "Estimated ATT by estimator and log or level (in million yen) outcome, with binary treatment"
)

# FIGURE A33
ggsave("figures/a33_npo_robust_monthly.pdf", plot_binary, height = 6, width = 10)

# Classic TWFE -----------------------------------------------------------------
# Binary amakudari indicator (log outcome)
fit_binary <- plm(amount_log ~ amakudari,
                  data = npo_amakudari, 
                  index = c("grantee_clean", "grant_month"), 
                  model = "within", 
                  effect = "twoways")

# Binary amakudari indicator (level outcome)
fit_binary_level <- plm(amount ~ amakudari,
                    data = npo_amakudari, 
                    index = c("grantee_clean", "grant_month"), 
                    model = "within", 
                    effect = "twoways")

# Continuous amakudari indicator (log outcome)
fit_continuous <- plm(amount_log ~ amakudari_n,
                      data = npo_amakudari, 
                      index = c("grantee_clean", "grant_month"), 
                      model = "within", 
                      effect = "twoways")

# Continuous amakudari indicator (level outcome)
fit_continuous_level <- plm(amount ~ amakudari_n,
                        data = npo_amakudari, 
                        index = c("grantee_clean", "grant_month"), 
                        model = "within", 
                        effect = "twoways")

# TABLE A22: Create table for classic 2WFE
stargazer::stargazer(fit_binary, fit_continuous, fit_binary_level, fit_continuous_level,
                     omit = "grantee_clean", 
                     omit.stat = c("adj.rsq", "rsq"),
                     type="latex",
                     out = "tables/a22_twfe_monthly.tex")

# ==============================================================================
# BENFORD'S LAW ANALYSIS ----
# ==============================================================================

# Create dataframes for each type of contact -----------------------------------
# All competitive bid contracts
bid <- jnpo %>%
  select(grant_date, grantee_clean, amount, competitive_bid) %>%
  filter(competitive_bid == "Competitive", amount > 0)

# All negotiated contracts
negotiated <- jnpo %>%
  mutate(amakudari = as.numeric(as.character(govt_reemployees))) %>%
  select(grant_date, grantee_clean, amount, govt_reemployees) %>%
  filter(govt_reemployees == 0, amount > 0)

# All negotiated contracts with amakudari on staff
negotiated_amakudari <- jnpo %>%
  mutate(amakudari = as.numeric(as.character(govt_reemployees))) %>%
  select(grant_date, grantee_clean, amount, govt_reemployees) %>%
  filter(govt_reemployees > 0, amount > 0)

# Analyze dataframes -----------------------------------------------------------
benford_bid = benford(bid$amount, number.of.digits = 1, discrete = TRUE, sign = "positive") 
benford_negotiated = benford(negotiated$amount, number.of.digits = 1, discrete = TRUE, sign = "positive") 
benford_negotiated_amakudari = benford(negotiated_amakudari$amount, number.of.digits = 1, discrete = TRUE, sign = "positive") 

# Plot results of analysis -----------------------------------------------------
# FIGURE A36
# NOTE: Cannot export first digit plot programmatically. 
# Needs to be manually cropped
pdf("figures/a36a_benford_bid.pdf", width = 10, height = 8)
plot(benford_bid)
dev.off()

pdf("figures/a36b_benford_neg.pdf", width = 10, height = 8)
plot(benford_negotiated)
dev.off()

pdf("figures/a36c_benford_neg_amakudari.pdf", width = 10, height = 8)
plot(benford_negotiated_amakudari)
dev.off()
